/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */#include"jit/x86-shared/Lowering-x86-shared.h"#include"mozilla/MathAlgorithms.h"#include"jit/MIR.h"#include"jit/shared/Lowering-shared-inl.h"usingnamespacejs;usingnamespacejs::jit;usingmozilla::Abs;usingmozilla::FloorLog2;usingmozilla::Swap;LTableSwitch*LIRGeneratorX86Shared::newLTableSwitch(constLAllocation&in,constLDefinition&inputCopy,MTableSwitch*tableswitch){returnnew(alloc())LTableSwitch(in,inputCopy,temp(),tableswitch);}LTableSwitchV*LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch*tableswitch){returnnew(alloc())LTableSwitchV(useBox(tableswitch->getOperand(0)),temp(),tempDouble(),temp(),tableswitch);}voidLIRGeneratorX86Shared::visitGuardShape(MGuardShape*ins){MOZ_ASSERT(ins->object()->type()==MIRType::Object);LGuardShape*guard=new(alloc())LGuardShape(useRegisterAtStart(ins->object()));assignSnapshot(guard,ins->bailoutKind());add(guard,ins);redefine(ins,ins->object());}voidLIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup*ins){MOZ_ASSERT(ins->object()->type()==MIRType::Object);LGuardObjectGroup*guard=new(alloc())LGuardObjectGroup(useRegisterAtStart(ins->object()));assignSnapshot(guard,ins->bailoutKind());add(guard,ins);redefine(ins,ins->object());}voidLIRGeneratorX86Shared::visitPowHalf(MPowHalf*ins){MDefinition*input=ins->input();MOZ_ASSERT(input->type()==MIRType::Double);LPowHalfD*lir=new(alloc())LPowHalfD(useRegisterAtStart(input));define(lir,ins);}voidLIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1,2,0>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs){ins->setOperand(0,useRegisterAtStart(lhs));// shift operator should be constant or in register ecx// x86 can't shift a non-ecx registerif(rhs->isConstant())ins->setOperand(1,useOrConstantAtStart(rhs));elseins->setOperand(1,lhs!=rhs?useFixed(rhs,ecx):useFixedAtStart(rhs,ecx));defineReuseInput(ins,mir,0);}template<size_tTemps>voidLIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES,INT64_PIECES+1,Temps>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs){ins->setInt64Operand(0,useInt64RegisterAtStart(lhs));#if defined(JS_NUNBOX32)if(mir->isRotate())ins->setTemp(0,temp());#endifstatic_assert(LShiftI64::Rhs==INT64_PIECES,"Assume Rhs is located at INT64_PIECES.");static_assert(LRotateI64::Count==INT64_PIECES,"Assume Count is located at INT64_PIECES.");// shift operator should be constant or in register ecx// x86 can't shift a non-ecx registerif(rhs->isConstant()){ins->setOperand(INT64_PIECES,useOrConstantAtStart(rhs));}else{// The operands are int64, but we only care about the lower 32 bits of// the RHS. On 32-bit, the code below will load that part in ecx and// will discard the upper half.ensureDefined(rhs);LUseuse(ecx);use.setVirtualRegister(rhs->virtualRegister());ins->setOperand(INT64_PIECES,use);}defineInt64ReuseInput(ins,mir,0);}templatevoidLIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES,INT64_PIECES+1,0>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs);templatevoidLIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES,INT64_PIECES+1,1>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs);voidLIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1,1,0>*ins,MDefinition*mir,MDefinition*input){ins->setOperand(0,useRegisterAtStart(input));defineReuseInput(ins,mir,0);}voidLIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1,2,0>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs){ins->setOperand(0,useRegisterAtStart(lhs));ins->setOperand(1,lhs!=rhs?useOrConstant(rhs):useOrConstantAtStart(rhs));defineReuseInput(ins,mir,0);}template<size_tTemps>voidLIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1,2,Temps>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs){// Without AVX, we'll need to use the x86 encodings where one of the// inputs must be the same location as the output.if(!Assembler::HasAVX()){ins->setOperand(0,useRegisterAtStart(lhs));ins->setOperand(1,lhs!=rhs?use(rhs):useAtStart(rhs));defineReuseInput(ins,mir,0);}else{ins->setOperand(0,useRegisterAtStart(lhs));ins->setOperand(1,useAtStart(rhs));define(ins,mir);}}templatevoidLIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1,2,0>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs);templatevoidLIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1,2,1>*ins,MDefinition*mir,MDefinition*lhs,MDefinition*rhs);voidLIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4*ins,MSimdBinaryComp*mir,MDefinition*lhs,MDefinition*rhs){lowerForALU(ins,mir,lhs,rhs);}voidLIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4*ins,MSimdBinaryComp*mir,MDefinition*lhs,MDefinition*rhs){// Swap the operands around to fit the instructions that x86 actually has.// We do this here, before register allocation, so that we don't need// temporaries and copying afterwards.switch(mir->operation()){caseMSimdBinaryComp::greaterThan:caseMSimdBinaryComp::greaterThanOrEqual:mir->reverse();Swap(lhs,rhs);break;default:break;}lowerForFPU(ins,mir,lhs,rhs);}voidLIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch*baab,MInstruction*mir,MDefinition*lhs,MDefinition*rhs){baab->setOperand(0,useRegisterAtStart(lhs));baab->setOperand(1,useRegisterOrConstantAtStart(rhs));add(baab,mir);}voidLIRGeneratorX86Shared::lowerMulI(MMul*mul,MDefinition*lhs,MDefinition*rhs){// Note: If we need a negative zero check, lhs is used twice.LAllocationlhsCopy=mul->canBeNegativeZero()?use(lhs):LAllocation();LMulI*lir=new(alloc())LMulI(useRegisterAtStart(lhs),useOrConstant(rhs),lhsCopy);if(mul->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineReuseInput(lir,mul,0);}voidLIRGeneratorX86Shared::lowerDivI(MDiv*div){if(div->isUnsigned()){lowerUDiv(div);return;}// Division instructions are slow. Division by constant denominators can be// rewritten to use other instructions.if(div->rhs()->isConstant()){int32_trhs=div->rhs()->toConstant()->toInt32();// Division by powers of two can be done by shifting, and division by// other numbers can be done by a reciprocal multiplication technique.int32_tshift=FloorLog2(Abs(rhs));if(rhs!=0&&uint32_t(1)<<shift==Abs(rhs)){LAllocationlhs=useRegisterAtStart(div->lhs());LDivPowTwoI*lir;if(!div->canBeNegativeDividend()){// Numerator is unsigned, so does not need adjusting.lir=new(alloc())LDivPowTwoI(lhs,lhs,shift,rhs<0);}else{// Numerator is signed, and needs adjusting, and an extra// lhs copy register is needed.lir=new(alloc())LDivPowTwoI(lhs,useRegister(div->lhs()),shift,rhs<0);}if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineReuseInput(lir,div,0);return;}if(rhs!=0){LDivOrModConstantI*lir;lir=new(alloc())LDivOrModConstantI(useRegister(div->lhs()),rhs,tempFixed(eax));if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,div,LAllocation(AnyRegister(edx)));return;}}LDivI*lir=new(alloc())LDivI(useRegister(div->lhs()),useRegister(div->rhs()),tempFixed(edx));if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,div,LAllocation(AnyRegister(eax)));}voidLIRGeneratorX86Shared::lowerModI(MMod*mod){if(mod->isUnsigned()){lowerUMod(mod);return;}if(mod->rhs()->isConstant()){int32_trhs=mod->rhs()->toConstant()->toInt32();int32_tshift=FloorLog2(Abs(rhs));if(rhs!=0&&uint32_t(1)<<shift==Abs(rhs)){LModPowTwoI*lir=new(alloc())LModPowTwoI(useRegisterAtStart(mod->lhs()),shift);if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineReuseInput(lir,mod,0);return;}if(rhs!=0){LDivOrModConstantI*lir;lir=new(alloc())LDivOrModConstantI(useRegister(mod->lhs()),rhs,tempFixed(edx));if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,mod,LAllocation(AnyRegister(eax)));return;}}LModI*lir=new(alloc())LModI(useRegister(mod->lhs()),useRegister(mod->rhs()),tempFixed(eax));if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,mod,LAllocation(AnyRegister(edx)));}voidLIRGeneratorX86Shared::visitWasmSelect(MWasmSelect*ins){if(ins->type()==MIRType::Int64){auto*lir=new(alloc())LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()),useInt64(ins->falseExpr()),useRegister(ins->condExpr()));defineInt64ReuseInput(lir,ins,LWasmSelectI64::TrueExprIndex);return;}auto*lir=new(alloc())LWasmSelect(useRegisterAtStart(ins->trueExpr()),use(ins->falseExpr()),useRegister(ins->condExpr()));defineReuseInput(lir,ins,LWasmSelect::TrueExprIndex);}voidLIRGeneratorX86Shared::visitWasmNeg(MWasmNeg*ins){switch(ins->type()){caseMIRType::Int32:defineReuseInput(new(alloc())LNegI(useRegisterAtStart(ins->input())),ins,0);break;caseMIRType::Float32:defineReuseInput(new(alloc())LNegF(useRegisterAtStart(ins->input())),ins,0);break;caseMIRType::Double:defineReuseInput(new(alloc())LNegD(useRegisterAtStart(ins->input())),ins,0);break;default:MOZ_CRASH();}}voidLIRGeneratorX86Shared::lowerUDiv(MDiv*div){if(div->rhs()->isConstant()){uint32_trhs=div->rhs()->toConstant()->toInt32();int32_tshift=FloorLog2(rhs);LAllocationlhs=useRegisterAtStart(div->lhs());if(rhs!=0&&uint32_t(1)<<shift==rhs){LDivPowTwoI*lir=new(alloc())LDivPowTwoI(lhs,lhs,shift,false);if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineReuseInput(lir,div,0);}else{LUDivOrModConstant*lir=new(alloc())LUDivOrModConstant(useRegister(div->lhs()),rhs,tempFixed(eax));if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,div,LAllocation(AnyRegister(edx)));}return;}LUDivOrMod*lir=new(alloc())LUDivOrMod(useRegister(div->lhs()),useRegister(div->rhs()),tempFixed(edx));if(div->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,div,LAllocation(AnyRegister(eax)));}voidLIRGeneratorX86Shared::lowerUMod(MMod*mod){if(mod->rhs()->isConstant()){uint32_trhs=mod->rhs()->toConstant()->toInt32();int32_tshift=FloorLog2(rhs);if(rhs!=0&&uint32_t(1)<<shift==rhs){LModPowTwoI*lir=new(alloc())LModPowTwoI(useRegisterAtStart(mod->lhs()),shift);if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineReuseInput(lir,mod,0);}else{LUDivOrModConstant*lir=new(alloc())LUDivOrModConstant(useRegister(mod->lhs()),rhs,tempFixed(edx));if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,mod,LAllocation(AnyRegister(eax)));}return;}LUDivOrMod*lir=new(alloc())LUDivOrMod(useRegister(mod->lhs()),useRegister(mod->rhs()),tempFixed(eax));if(mod->fallible())assignSnapshot(lir,Bailout_DoubleOutput);defineFixed(lir,mod,LAllocation(AnyRegister(edx)));}voidLIRGeneratorX86Shared::lowerUrshD(MUrsh*mir){MDefinition*lhs=mir->lhs();MDefinition*rhs=mir->rhs();MOZ_ASSERT(lhs->type()==MIRType::Int32);MOZ_ASSERT(rhs->type()==MIRType::Int32);MOZ_ASSERT(mir->type()==MIRType::Double);#ifdef JS_CODEGEN_X64MOZ_ASSERT(ecx==rcx);#endifLUselhsUse=useRegisterAtStart(lhs);LAllocationrhsAlloc=rhs->isConstant()?useOrConstant(rhs):useFixed(rhs,ecx);LUrshD*lir=new(alloc())LUrshD(lhsUse,rhsAlloc,tempCopy(lhs,0));define(lir,mir);}voidLIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32*ins){MDefinition*opd=ins->input();MOZ_ASSERT(opd->type()==MIRType::Double);LDefinitionmaybeTemp=Assembler::HasSSE3()?LDefinition::BogusTemp():tempDouble();define(new(alloc())LTruncateDToInt32(useRegister(opd),maybeTemp),ins);}voidLIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32*ins){MDefinition*opd=ins->input();MOZ_ASSERT(opd->type()==MIRType::Float32);LDefinitionmaybeTemp=Assembler::HasSSE3()?LDefinition::BogusTemp():tempFloat32();define(new(alloc())LTruncateFToInt32(useRegister(opd),maybeTemp),ins);}voidLIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement*ins,booluseI386ByteRegisters){MOZ_ASSERT(ins->arrayType()!=Scalar::Float32);MOZ_ASSERT(ins->arrayType()!=Scalar::Float64);MOZ_ASSERT(ins->elements()->type()==MIRType::Elements);MOZ_ASSERT(ins->index()->type()==MIRType::Int32);constLUseelements=useRegister(ins->elements());constLAllocationindex=useRegisterOrConstant(ins->index());// If the target is a floating register then we need a temp at the// lower level; that temp must be eax.//// Otherwise the target (if used) is an integer register, which// must be eax. If the target is not used the machine code will// still clobber eax, so just pretend it's used.//// oldval must be in a register.//// newval must be in a register. If the source is a byte array// then newval must be a register that has a byte size: on x86// this must be ebx, ecx, or edx (eax is taken for the output).//// Bug #1077036 describes some further optimization opportunities.boolfixedOutput=false;LDefinitiontempDef=LDefinition::BogusTemp();LAllocationnewval;if(ins->arrayType()==Scalar::Uint32&&IsFloatingPointType(ins->type())){tempDef=tempFixed(eax);newval=useRegister(ins->newval());}else{fixedOutput=true;if(useI386ByteRegisters&&ins->isByteArray())newval=useFixed(ins->newval(),ebx);elsenewval=useRegister(ins->newval());}constLAllocationoldval=useRegister(ins->oldval());LCompareExchangeTypedArrayElement*lir=new(alloc())LCompareExchangeTypedArrayElement(elements,index,oldval,newval,tempDef);if(fixedOutput)defineFixed(lir,ins,LAllocation(AnyRegister(eax)));elsedefine(lir,ins);}voidLIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement*ins,booluseI386ByteRegisters){MOZ_ASSERT(ins->arrayType()<=Scalar::Uint32);MOZ_ASSERT(ins->elements()->type()==MIRType::Elements);MOZ_ASSERT(ins->index()->type()==MIRType::Int32);constLUseelements=useRegister(ins->elements());constLAllocationindex=useRegisterOrConstant(ins->index());constLAllocationvalue=useRegister(ins->value());// The underlying instruction is XCHG, which can operate on any// register.//// If the target is a floating register (for Uint32) then we need// a temp into which to exchange.//// If the source is a byte array then we need a register that has// a byte size; in this case -- on x86 only -- pin the output to// an appropriate register and use that as a temp in the back-end.LDefinitiontempDef=LDefinition::BogusTemp();if(ins->arrayType()==Scalar::Uint32){// This restriction is bug 1077305.MOZ_ASSERT(ins->type()==MIRType::Double);tempDef=temp();}LAtomicExchangeTypedArrayElement*lir=new(alloc())LAtomicExchangeTypedArrayElement(elements,index,value,tempDef);if(useI386ByteRegisters&&ins->isByteArray())defineFixed(lir,ins,LAllocation(AnyRegister(eax)));elsedefine(lir,ins);}voidLIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop*ins,booluseI386ByteRegisters){MOZ_ASSERT(ins->arrayType()!=Scalar::Uint8Clamped);MOZ_ASSERT(ins->arrayType()!=Scalar::Float32);MOZ_ASSERT(ins->arrayType()!=Scalar::Float64);MOZ_ASSERT(ins->elements()->type()==MIRType::Elements);MOZ_ASSERT(ins->index()->type()==MIRType::Int32);constLUseelements=useRegister(ins->elements());constLAllocationindex=useRegisterOrConstant(ins->index());// Case 1: the result of the operation is not used.//// We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,// LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.if(!ins->hasUses()){LAllocationvalue;if(useI386ByteRegisters&&ins->isByteArray()&&!ins->value()->isConstant())value=useFixed(ins->value(),ebx);elsevalue=useRegisterOrConstant(ins->value());LAtomicTypedArrayElementBinopForEffect*lir=new(alloc())LAtomicTypedArrayElementBinopForEffect(elements,index,value);add(lir,ins);return;}// Case 2: the result of the operation is used.//// For ADD and SUB we'll use XADD://// movl src, output// lock xaddl output, mem//// For the 8-bit variants XADD needs a byte register for the output.//// For AND/OR/XOR we need to use a CMPXCHG loop://// movl *mem, eax// L: mov eax, temp// andl src, temp// lock cmpxchg temp, mem ; reads eax also// jnz L// ; result in eax//// Note the placement of L, cmpxchg will update eax with *mem if// *mem does not have the expected value, so reloading it at the// top of the loop would be redundant.//// If the array is not a uint32 array then:// - eax should be the output (one result of the cmpxchg)// - there is a temp, which must have a byte register if// the array has 1-byte elements elements//// If the array is a uint32 array then:// - eax is the first temp// - we also need a second temp//// There are optimization opportunities:// - better register allocation in the x86 8-bit case, Bug #1077036.boolbitOp=!(ins->operation()==AtomicFetchAddOp||ins->operation()==AtomicFetchSubOp);boolfixedOutput=true;boolreuseInput=false;LDefinitiontempDef1=LDefinition::BogusTemp();LDefinitiontempDef2=LDefinition::BogusTemp();LAllocationvalue;if(ins->arrayType()==Scalar::Uint32&&IsFloatingPointType(ins->type())){value=useRegisterOrConstant(ins->value());fixedOutput=false;if(bitOp){tempDef1=tempFixed(eax);tempDef2=temp();}else{tempDef1=temp();}}elseif(useI386ByteRegisters&&ins->isByteArray()){if(ins->value()->isConstant())value=useRegisterOrConstant(ins->value());elsevalue=useFixed(ins->value(),ebx);if(bitOp)tempDef1=tempFixed(ecx);}elseif(bitOp){value=useRegisterOrConstant(ins->value());tempDef1=temp();}elseif(ins->value()->isConstant()){fixedOutput=false;value=useRegisterOrConstant(ins->value());}else{fixedOutput=false;reuseInput=true;value=useRegisterAtStart(ins->value());}LAtomicTypedArrayElementBinop*lir=new(alloc())LAtomicTypedArrayElementBinop(elements,index,value,tempDef1,tempDef2);if(fixedOutput)defineFixed(lir,ins,LAllocation(AnyRegister(eax)));elseif(reuseInput)defineReuseInput(lir,ins,LAtomicTypedArrayElementBinop::valueOp);elsedefine(lir,ins);}voidLIRGeneratorX86Shared::visitSimdInsertElement(MSimdInsertElement*ins){MOZ_ASSERT(IsSimdType(ins->type()));LUsevec=useRegisterAtStart(ins->vector());LUseval=useRegister(ins->value());switch(ins->type()){caseMIRType::Int8x16:caseMIRType::Bool8x16:// When SSE 4.1 is not available, we need to go via the stack.// This requires the value to be inserted to be in %eax-%edx.// Pick %ebx since other instructions use %eax or %ecx hard-wired.#if defined(JS_CODEGEN_X86)if(!AssemblerX86Shared::HasSSE41())val=useFixed(ins->value(),ebx);#endifdefineReuseInput(new(alloc())LSimdInsertElementI(vec,val),ins,0);break;caseMIRType::Int16x8:caseMIRType::Int32x4:caseMIRType::Bool16x8:caseMIRType::Bool32x4:defineReuseInput(new(alloc())LSimdInsertElementI(vec,val),ins,0);break;caseMIRType::Float32x4:defineReuseInput(new(alloc())LSimdInsertElementF(vec,val),ins,0);break;default:MOZ_CRASH("Unknown SIMD kind when generating constant");}}voidLIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement*ins){MOZ_ASSERT(IsSimdType(ins->input()->type()));MOZ_ASSERT(!IsSimdType(ins->type()));switch(ins->input()->type()){caseMIRType::Int8x16:caseMIRType::Int16x8:caseMIRType::Int32x4:{MOZ_ASSERT(ins->signedness()!=SimdSign::NotApplicable);LUseuse=useRegisterAtStart(ins->input());if(ins->type()==MIRType::Double){// Extract an Uint32 lane into a double.MOZ_ASSERT(ins->signedness()==SimdSign::Unsigned);define(new(alloc())LSimdExtractElementU2D(use,temp()),ins);}else{auto*lir=new(alloc())LSimdExtractElementI(use);#if defined(JS_CODEGEN_X86)// On x86 (32-bit), we may need to use movsbl or movzbl instructions// to sign or zero extend the extracted lane to 32 bits. The 8-bit// version of these instructions require a source register that is// %al, %bl, %cl, or %dl.// Fix it to %ebx since we can't express that constraint better.if(ins->input()->type()==MIRType::Int8x16){defineFixed(lir,ins,LAllocation(AnyRegister(ebx)));return;}#endifdefine(lir,ins);}break;}caseMIRType::Float32x4:{MOZ_ASSERT(ins->signedness()==SimdSign::NotApplicable);LUseuse=useRegisterAtStart(ins->input());define(new(alloc())LSimdExtractElementF(use),ins);break;}caseMIRType::Bool8x16:caseMIRType::Bool16x8:caseMIRType::Bool32x4:{MOZ_ASSERT(ins->signedness()==SimdSign::NotApplicable);LUseuse=useRegisterAtStart(ins->input());define(new(alloc())LSimdExtractElementB(use),ins);break;}default:MOZ_CRASH("Unknown SIMD kind when extracting element");}}voidLIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith*ins){MOZ_ASSERT(IsSimdType(ins->lhs()->type()));MOZ_ASSERT(IsSimdType(ins->rhs()->type()));MOZ_ASSERT(IsSimdType(ins->type()));MDefinition*lhs=ins->lhs();MDefinition*rhs=ins->rhs();if(ins->isCommutative())ReorderCommutative(&lhs,&rhs,ins);switch(ins->type()){caseMIRType::Int8x16:{LSimdBinaryArithIx16*lir=new(alloc())LSimdBinaryArithIx16();lir->setTemp(0,LDefinition::BogusTemp());lowerForFPU(lir,ins,lhs,rhs);return;}caseMIRType::Int16x8:{LSimdBinaryArithIx8*lir=new(alloc())LSimdBinaryArithIx8();lir->setTemp(0,LDefinition::BogusTemp());lowerForFPU(lir,ins,lhs,rhs);return;}caseMIRType::Int32x4:{LSimdBinaryArithIx4*lir=new(alloc())LSimdBinaryArithIx4();boolneedsTemp=ins->operation()==MSimdBinaryArith::Op_mul&&!MacroAssembler::HasSSE41();lir->setTemp(0,needsTemp?temp(LDefinition::SIMD128INT):LDefinition::BogusTemp());lowerForFPU(lir,ins,lhs,rhs);return;}caseMIRType::Float32x4:{LSimdBinaryArithFx4*lir=new(alloc())LSimdBinaryArithFx4();boolneedsTemp=ins->operation()==MSimdBinaryArith::Op_max||ins->operation()==MSimdBinaryArith::Op_minNum||ins->operation()==MSimdBinaryArith::Op_maxNum;lir->setTemp(0,needsTemp?temp(LDefinition::SIMD128FLOAT):LDefinition::BogusTemp());lowerForFPU(lir,ins,lhs,rhs);return;}default:MOZ_CRASH("unknown simd type on binary arith operation");}}voidLIRGeneratorX86Shared::visitSimdBinarySaturating(MSimdBinarySaturating*ins){MOZ_ASSERT(IsSimdType(ins->lhs()->type()));MOZ_ASSERT(IsSimdType(ins->rhs()->type()));MOZ_ASSERT(IsSimdType(ins->type()));MDefinition*lhs=ins->lhs();MDefinition*rhs=ins->rhs();if(ins->isCommutative())ReorderCommutative(&lhs,&rhs,ins);LSimdBinarySaturating*lir=new(alloc())LSimdBinarySaturating();lowerForFPU(lir,ins,lhs,rhs);}voidLIRGeneratorX86Shared::visitSimdSelect(MSimdSelect*ins){MOZ_ASSERT(IsSimdType(ins->type()));LSimdSelect*lins=new(alloc())LSimdSelect;MDefinition*r0=ins->getOperand(0);MDefinition*r1=ins->getOperand(1);MDefinition*r2=ins->getOperand(2);lins->setOperand(0,useRegister(r0));lins->setOperand(1,useRegister(r1));lins->setOperand(2,useRegister(r2));lins->setTemp(0,temp(LDefinition::SIMD128FLOAT));define(lins,ins);}voidLIRGeneratorX86Shared::visitSimdSplat(MSimdSplat*ins){LAllocationx=useRegisterAtStart(ins->getOperand(0));switch(ins->type()){caseMIRType::Int8x16:define(new(alloc())LSimdSplatX16(x),ins);break;caseMIRType::Int16x8:define(new(alloc())LSimdSplatX8(x),ins);break;caseMIRType::Int32x4:caseMIRType::Float32x4:caseMIRType::Bool8x16:caseMIRType::Bool16x8:caseMIRType::Bool32x4:// Use the SplatX4 instruction for all boolean splats. Since the input// value is a 32-bit int that is either 0 or -1, the X4 splat gives// the right result for all boolean geometries.// For floats, (Non-AVX) codegen actually wants the input and the output// to be in the same register, but we can't currently use// defineReuseInput because they have different types (scalar vs// vector), so a spill slot for one may not be suitable for the other.define(new(alloc())LSimdSplatX4(x),ins);break;default:MOZ_CRASH("Unknown SIMD kind");}}voidLIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4*ins){switch(ins->type()){caseMIRType::Float32x4:{// Ideally, x would be used at start and reused for the output, however// register allocation currently doesn't permit us to tie together two// virtual registers with different types.LAllocationx=useRegister(ins->getOperand(0));LAllocationy=useRegister(ins->getOperand(1));LAllocationz=useRegister(ins->getOperand(2));LAllocationw=useRegister(ins->getOperand(3));LDefinitiont=temp(LDefinition::SIMD128FLOAT);define(new(alloc())LSimdValueFloat32x4(x,y,z,w,t),ins);break;}caseMIRType::Bool32x4:caseMIRType::Int32x4:{// No defineReuseInput => useAtStart for everyone.LAllocationx=useRegisterAtStart(ins->getOperand(0));LAllocationy=useRegisterAtStart(ins->getOperand(1));LAllocationz=useRegisterAtStart(ins->getOperand(2));LAllocationw=useRegisterAtStart(ins->getOperand(3));define(new(alloc())LSimdValueInt32x4(x,y,z,w),ins);break;}default:MOZ_CRASH("Unknown SIMD kind");}}voidLIRGeneratorX86Shared::visitSimdSwizzle(MSimdSwizzle*ins){MOZ_ASSERT(IsSimdType(ins->input()->type()));MOZ_ASSERT(IsSimdType(ins->type()));if(IsIntegerSimdType(ins->input()->type())){LUseuse=useRegisterAtStart(ins->input());LSimdSwizzleI*lir=new(alloc())LSimdSwizzleI(use);define(lir,ins);// We need a GPR temp register for pre-SSSE3 codegen (no vpshufb).if(Assembler::HasSSSE3()){lir->setTemp(0,LDefinition::BogusTemp());}else{// The temp must be a GPR usable with 8-bit loads and stores.#if defined(JS_CODEGEN_X86)lir->setTemp(0,tempFixed(ebx));#elselir->setTemp(0,temp());#endif}}elseif(ins->input()->type()==MIRType::Float32x4){LUseuse=useRegisterAtStart(ins->input());LSimdSwizzleF*lir=new(alloc())LSimdSwizzleF(use);define(lir,ins);lir->setTemp(0,LDefinition::BogusTemp());}else{MOZ_CRASH("Unknown SIMD kind when getting lane");}}voidLIRGeneratorX86Shared::visitSimdShuffle(MSimdShuffle*ins){MOZ_ASSERT(IsSimdType(ins->lhs()->type()));MOZ_ASSERT(IsSimdType(ins->rhs()->type()));MOZ_ASSERT(IsSimdType(ins->type()));if(ins->type()==MIRType::Int32x4||ins->type()==MIRType::Float32x4){boolzFromLHS=ins->lane(2)<4;boolwFromLHS=ins->lane(3)<4;uint32_tlanesFromLHS=(ins->lane(0)<4)+(ins->lane(1)<4)+zFromLHS+wFromLHS;LSimdShuffleX4*lir=new(alloc())LSimdShuffleX4();lowerForFPU(lir,ins,ins->lhs(),ins->rhs());// See codegen for requirements details.LDefinitiontemp=(lanesFromLHS==3)?tempCopy(ins->rhs(),1):LDefinition::BogusTemp();lir->setTemp(0,temp);}else{MOZ_ASSERT(ins->type()==MIRType::Int8x16||ins->type()==MIRType::Int16x8);LSimdShuffle*lir=new(alloc())LSimdShuffle();lir->setOperand(0,useRegister(ins->lhs()));lir->setOperand(1,useRegister(ins->rhs()));define(lir,ins);// We need a GPR temp register for pre-SSSE3 codegen, and an SSE temp// when using pshufb.if(Assembler::HasSSSE3()){lir->setTemp(0,temp(LDefinition::SIMD128INT));}else{// The temp must be a GPR usable with 8-bit loads and stores.#if defined(JS_CODEGEN_X86)lir->setTemp(0,tempFixed(ebx));#elselir->setTemp(0,temp());#endif}}}voidLIRGeneratorX86Shared::visitSimdGeneralShuffle(MSimdGeneralShuffle*ins){MOZ_ASSERT(IsSimdType(ins->type()));LSimdGeneralShuffleBase*lir;if(IsIntegerSimdType(ins->type())){#if defined(JS_CODEGEN_X86)// The temp register must be usable with 8-bit load and store// instructions, so one of %eax-%edx.LDefinitiont;if(ins->type()==MIRType::Int8x16)t=tempFixed(ebx);elset=temp();#elseLDefinitiont=temp();#endiflir=new(alloc())LSimdGeneralShuffleI(t);}elseif(ins->type()==MIRType::Float32x4){lir=new(alloc())LSimdGeneralShuffleF(temp());}else{MOZ_CRASH("Unknown SIMD kind when doing a shuffle");}if(!lir->init(alloc(),ins->numVectors()+ins->numLanes()))return;for(unsignedi=0;i<ins->numVectors();i++){MOZ_ASSERT(IsSimdType(ins->vector(i)->type()));lir->setOperand(i,useRegister(ins->vector(i)));}for(unsignedi=0;i<ins->numLanes();i++){MOZ_ASSERT(ins->lane(i)->type()==MIRType::Int32);// Note that there can be up to 16 lane arguments, so we can't assume// that they all get an allocated register.lir->setOperand(i+ins->numVectors(),use(ins->lane(i)));}assignSnapshot(lir,Bailout_BoundsCheck);define(lir,ins);}voidLIRGeneratorX86Shared::visitCopySign(MCopySign*ins){MDefinition*lhs=ins->lhs();MDefinition*rhs=ins->rhs();MOZ_ASSERT(IsFloatingPointType(lhs->type()));MOZ_ASSERT(lhs->type()==rhs->type());MOZ_ASSERT(lhs->type()==ins->type());LInstructionHelper<1,2,2>*lir;if(lhs->type()==MIRType::Double)lir=new(alloc())LCopySignD();elselir=new(alloc())LCopySignF();// As lowerForFPU, but we want rhs to be in a FP register too.lir->setOperand(0,useRegisterAtStart(lhs));lir->setOperand(1,lhs!=rhs?useRegister(rhs):useRegisterAtStart(rhs));if(!Assembler::HasAVX())defineReuseInput(lir,ins,0);elsedefine(lir,ins);}